1 R function to read echo files

R function called read.echo() to parse the input echo files and extract: * time * strain * strain rate * radial strain * radial strain rate * volume * segment volume

read.echo <- function(infile){

  # Read in the file into a single vector
  con <- readLines(infile)

  # Instantiate vectors
  frame.time <- vector()
  time.progression <- vector()
  strain <- vector()
  strain.rate <- vector()
  radial.strain <- vector()
  radial.strain.rate <- vector()
  volume <- vector()
  seg.vol <- vector()

  for(i in 1:length(con)){
    if(con[i]=="FrameTime(ms)"){
      frame.time <- strsplit(con[i+1], split="\t")[[1]]
      }

    if(con[i]=="TimeProgression(ms)"){
      time.progression <- strsplit(con[i+1], split="\t")[[1]]
      }

    if(con[i]=="Strain (%)"){
      j <- 1
      while(con[i+j] != ""){
        strain <- rbind(strain, strsplit(con[i+j], split="\t")[[1]])
        j <- j + 1
      }
    }

    if(con[i]=="StrainRate (1/s)"){
      j <- 1
      while(con[i+j] != ""){
        strain.rate <- rbind(strain.rate, strsplit(con[i+j], split="\t")[[1]])
        j <- j + 1
      }
    }

    if(con[i]=="RadialStrain (%)"){
      j <- 1
      while(con[i+j] != ""){
        radial.strain <- rbind(radial.strain, strsplit(con[i+j], split="\t")[[1]])
        j <- j + 1
      }
    }

    if(con[i]=="RadialStrainRate (1/s)"){
      j <- 1
      while(con[i+j] != ""){
        radial.strain.rate <- rbind(radial.strain.rate, strsplit(con[i+j], split="\t")[[1]])
        j <- j + 1
      }
    }

    if(con[i]=="Vol (ml)"){
      volume <- strsplit(con[i+1], split="\t")[[1]]
      }

    if(con[i]=="Seg_Vol (ml)"){
      j <- 1
      while(con[i+j] != ""){
        seg.vol <- rbind(seg.vol, strsplit(con[i+j], split="\t")[[1]])
        j <- j + 1
      }
    }

    }

  class(frame.time) <- "numeric"
  class(time.progression) <- "numeric"
  class(strain) <- "numeric"
  class(strain.rate) <- "numeric"
  class(radial.strain) <- "numeric"
  class(radial.strain.rate) <- "numeric"
  class(volume) <- "numeric"
  class(seg.vol) <- "numeric"

  return(list(frame.time=(frame.time),
              time.progression=(time.progression),
              strain=t(strain),
              strain.rate=t(strain.rate),
              radial.strain=t(radial.strain),
              radial.strain.rate=t(radial.strain.rate),
              volume=(volume),
              seg.vol=(seg.vol)))
}
# Read in the echos
echo1 <- read.echo("~/Projects/partho/data/pborsuk2c.txt")
echo2 <- read.echo("~/Projects/partho/data/pborsuk4c.txt")

# Get number of time points
nTimepoints <- dim(echo1$strain)[1]
nProbes <- dim(echo1$strain)[2]

2 Exploratory Analysis

2.1 Strain

2.2 Strain Rates

2.3 Radial Strain

2.4 Radial strain rates

3 Generating sample data

Generating by using multivariate normal distribution with means and covariance matrix from given time series.

3.1 Example: plots of randomly generated sample data

library(MASS)
set.seed(10)


# From first echo example
r1 <- mvrnorm(nProbes, mu=apply(echo1$strain, 1, mean), Sigma=cov(t(echo1$strain)))
plot(x=echo1$time.progression, y=r1[1,], ylim=c(min(r1),max(r1)),type="n", xlab="Time (ms)", ylab="Strain", main="Random Echo Generated from Echo 1")
apply(r1, 1, lines, x=echo1$time.progression)

## NULL
# From second echo example
r2 <- mvrnorm(nProbes, mu=apply(echo2$strain, 1, mean), Sigma=cov(t(echo2$strain)))
plot(x=echo2$time.progression, y=r2[1,], ylim=c(min(r2),max(r2)),type="n", xlab="Time (ms)", ylab="Strain",main="Random Echo Generated from Echo 2")
apply(r2, 1, lines, x=echo2$time.progression)

## NULL

3.1.1 Dealing with different times

The first echo has time data for 92 time points; the second has data for 93 time points. We can drop the last time point to facilitate analysis, if required later.

#plot(echo1$time.progression/echo2$time.progression[1:length(echo1$time.progression)])

t1 <- echo1$time.progression
t2 <- echo2$time.progression[1:length(echo1$time.progression)] #drop last time

4 Statistical Analysis

5 Trace of covariance matrix

The trace of the covariance matrix is equal to the total variance.

library(psych)

st1 <- echo1$strain
st2 <- echo2$strain

tr(cov(st1))
## [1] 3748.301
tr(cov(st2))
## [1] 1564.79

6 Tensor Analysis

6.1 Create tensors of sample data

Uses the rTensor package.

We create multidimensional arrays and then fill them in with simulated data with the following structure:

i = sample index (1 to n samples) j = index of LV point where strain is measured k = time

TODO: Make sure R x C are in correct order.

nSamples <- 25

# Set dimensions for an i by j by k tensor
# i = strain probe index
# j = time point index
# k = sample index 
tensorDim1 <- c(dim(echo1$strain)[2], dim(echo1$strain)[1], nSamples) 
samp1 <- array(data=NA, dim=tensorDim1)

for(i in 1:nSamples){ 
  samp1[, ,i] <- mvrnorm(nProbes, mu=apply(echo1$strain, 1, mean), Sigma=cov(t(echo1$strain)))
}

#Plot simulated data
# Probe 1, 25 individuals
plot(samp1[1,,1],type="l", ylim=c(min(samp1[1,,]),max(samp1[1,,]) ))
for(i in 2:nSamples){ lines(samp1[1,,i])}

# Probe 2, 25 individuals
plot(samp1[2,,1],type="l", ylim=c(min(samp1[2,,]),max(samp1[2,,]) ))
for(i in 2:nSamples){ lines(samp1[2,,i])}

# Creating tensor 2
tensorDim2 <- c(dim(echo2$strain)[2], dim(echo2$strain)[1], nSamples) 
samp2 <- array(data=NA, dim=tensorDim2)

for(i in 1:nSamples){ 
  samp2[, ,i] <- mvrnorm(nProbes, mu=apply(echo2$strain, 1, mean), Sigma=cov(t(echo2$strain)))
}

library(rTensor)
tnsr1 <- as.tensor(samp1)
tnsr2 <- as.tensor(samp2)

7 Principal component analysis, strain, actual data

ep1 <- prcomp(echo1$strain, scale=TRUE) # 70% by pc1
ep2 <- prcomp(echo2$strain, scale=TRUE) # 70% by pc1

ep1 <- prcomp(echo1$strain, scale=FALSE) # 70% by pc1
ep2 <- prcomp(echo2$strain, scale=FALSE) # 70% by pc1

#summary(ep1)
#summary(ep2)

#plot(ep1$rotation[,1], ep1$rotation[,2])
#plot(ep2$rotation[,1], ep2$rotation[,2])

plot(ep1$sdev, type="b",xlab="Principal component",ylab="Variance",main="Echo 1")

plot(ep2$sdev, type="b",xlab="Principal component",ylab="Variance",main="Echo 2")

plot(ep1$x[,1],type="l",xlab="Time",ylab="Rotated Data",main="Echo 1")
lines(ep1$x[,2],col="red")
lines(ep1$x[,3],col="blue")
lines(ep1$x[,4],col="green")

plot(ep2$x[,1],type="l",xlab="Time",ylab="Rotated Data",main="Echo 2")
lines(ep2$x[,2],col="red")
lines(ep2$x[,3],col="blue")
lines(ep2$x[,4],col="green")

8 Principal component analysis, strain rate, actual data

sr1 <- echo1$strain
sr2 <- echo2$strain

ep1 <- prcomp(echo1$strain.rate, scale=TRUE) # 70% by pc1
ep2 <- prcomp(echo2$strain.rate, scale=TRUE) # 70% by pc1

ep1 <- prcomp(echo1$strain.rate, scale=FALSE) # 70% by pc1
ep2 <- prcomp(echo2$strain.rate, scale=FALSE) # 70% by pc1

#summary(ep1)
#summary(ep2)

#plot(ep1$rotation[,1], ep1$rotation[,2])
#plot(ep2$rotation[,1], ep2$rotation[,2])

plot(ep1$sdev, type="b",xlab="Principal component",ylab="Variance",main="Echo 1")

plot(ep2$sdev, type="b",xlab="Principal component",ylab="Variance",main="Echo 2")

plot(ep1$x[,1],type="l",xlab="Time",ylab="Rotated Data",main="Echo 1")
lines(ep1$x[,2],col="red")
lines(ep1$x[,3],col="blue")
lines(ep1$x[,4],col="green")

plot(ep2$x[,1],type="l",xlab="Time",ylab="Rotated Data",main="Echo 2")
lines(ep2$x[,2],col="red")
lines(ep2$x[,3],col="blue")
lines(ep2$x[,4],col="green")

8.1 Compute total variances

# Total variances, actual data
tr(cov(st1))
## [1] 3748.301
tr(cov(st2))
## [1] 1564.79
# Total variances, simulated data
totvar1 <- numeric()
totvar2 <- numeric()

for(i in 1:nSamples){
  totvar1[i] <- tr(cov(t(samp1[,,i])))
  totvar2[i] <- tr(cov(t(samp2[,,i])))
  }

boxplot(totvar1, totvar2,names=c("Sample  ","Sample 2"),ylab="Total Variance")

9 Protein-Like Analysis

9.1 Root mean square deviation function

rmsd <- function(positions){
    # Input must be data frame with rows=times 
    # and columns=probe values (strains, strain rates, etc.)
  
    outputdf <- numeric(length=dim(positions)[1])
    
    for(i in 1:nrow(positions)){
      outputdf[i] <- sqrt(sum( (positions[i,] - positions[1,])^2 ) / length(positions[i,]))
    }
    return(rmsd=outputdf)
}

rmsd1 <- rmsd(echo1$strain)
rmsd2 <- rmsd(echo2$strain)

plot(rmsd1,type="l",xlab="Time",ylab="RMS Deviation", main="Echo 1 RMSD")

plot(rmsd2,type="l",xlab="Time",ylab="RMS Deviation", main="Echo 2 RMSD")

10 Root mean square fluctuation function

# RMSF is taken as average per particle, not per time (which is RMSD)
rmsf <- function(positions){
    # Input must be data frame with rows=times 
    # and columns=probe values (strains, strain rates, etc.)
  
    outputdf <- numeric(length=dim(positions)[2])
    
    for(i in 1:ncol(positions)){
      outputdf[i] <- sqrt(sum( (positions[,i] - positions[,1])^2 ) / length(positions[,i]))
    }
    return(rmsf=outputdf)
}

rmsf1 <- rmsf(echo1$strain)
rmsf2 <- rmsf(echo2$strain)

plot(rmsf1,type="l",xlab="Position Index",ylab="RMS Fluctuation",main="Echo 1 RMSF")

plot(rmsf2,type="l",xlab="Position Index",ylab="RMS Fluctuation",main="Echo 2 RMSF")

11 Generation of random RMSD sequences

Model the sample RMSDs as ARIMA processes, and simulate from those ARIMA models.

12 Function to generate random RMSDs

library(forecast)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## Loading required package: timeDate
## This is forecast 6.2
m2 <- auto.arima(rmsd2)

simulate_RMSDs <- function(RMSDvec, n=100){

  m1 <- auto.arima(RMSDvec)
  nSimRMSD <- n; RMSDmat <- matrix(data=NA, nrow=length(RMSDvec), ncol=nSimRMSD)

  for(i in 1:nSimRMSD){
    RMSDmat[,i] <-  simulate.Arima(m1) }
  
  return(RMSDmat)
  }

13 Plot RMSD simulations

This is an example of the randomly generated sequences.

randRMSDs1 <- simulate_RMSDs(rmsd1,n=10)
randRMSDs2 <- simulate_RMSDs(rmsd2, n=10)

plot(randRMSDs1[,1],type="l",ylim=c(min(randRMSDs1), max(randRMSDs1) ),xlab="Time Index",ylab="RMSD",main="Randomly Generated RMSDs")

for(i in 1:dim(randRMSDs1)[2]){ lines(randRMSDs1[,i],type="l", col=i) }

14 Simulated RMSD trajectory statistics

# Total variance of RMSDs
tr(cov(randRMSDs1))
## [1] 419.3347
tr(cov(randRMSDs2))
## [1] 614.6101
# Generalized Variance of RMSDs
det(cov(randRMSDs1))
## [1] 1.014382e+12
det(cov(randRMSDs2))
## [1] 460300028
eigs1 <- eigen(cov(randRMSDs1))
eigs2 <- eigen(cov(randRMSDs2))

plot(eigs1$values, type="b",ylab="Eigenvalue",xlab="Eigenvalue Index",main="Echo 1 Simulations")

plot(eigs2$values, type="b",ylab="Eigenvalue",xlab="Eigenvalue Index",main="Echo 2 Simulations")

plot(eigs1$vectors, xlab="PC1", ylab="PC2",main="Eigenvectors of RMSD Trajectories, Sim. 1")

plot(eigs2$vectors, xlab="PC1", ylab="PC2",main="Eigenvectors of RMSD Trajectories, Sim. 2")

cor(eigs1$vectors)
##               [,1]        [,2]         [,3]        [,4]        [,5]
##  [1,]  1.000000000  0.07102367 -0.016744181  0.06039411 -0.12254644
##  [2,]  0.071023669  1.00000000  0.040121311 -0.14471241  0.29363778
##  [3,] -0.016744181  0.04012131  1.000000000  0.03411666 -0.06922656
##  [4,]  0.060394106 -0.14471241  0.034116665  1.00000000  0.24969128
##  [5,] -0.122546444  0.29363778 -0.069226556  0.24969128  1.00000000
##  [6,] -0.034320759  0.08223716 -0.019387817  0.06992936 -0.14189455
##  [7,] -0.023337405  0.05591956 -0.013183314  0.04755052 -0.09648535
##  [8,]  0.006211853 -0.01488444  0.003509079 -0.01265680  0.02568207
##  [9,]  0.093719534 -0.22456462  0.052942218 -0.19095577  0.38747077
## [10,] -0.052274792  0.12525744 -0.029530060  0.10651113 -0.21612307
##               [,6]         [,7]         [,8]        [,9]       [,10]
##  [1,] -0.034320759 -0.023337405  0.006211853  0.09371953 -0.05227479
##  [2,]  0.082237160  0.055919565 -0.014884435 -0.22456462  0.12525744
##  [3,] -0.019387817 -0.013183314  0.003509079  0.05294222 -0.02953006
##  [4,]  0.069929360  0.047550516 -0.012656797 -0.19095577  0.10651113
##  [5,] -0.141894549 -0.096485353  0.025682067  0.38747077 -0.21612307
##  [6,]  1.000000000 -0.027022005  0.007192604  0.10851634 -0.06052814
##  [7,] -0.027022005  1.000000000  0.004890822  0.07378886 -0.04115788
##  [8,]  0.007192604  0.004890822  1.000000000 -0.01964081  0.01095523
##  [9,]  0.108516335  0.073788859 -0.019640809  1.00000000  0.16528390
## [10,] -0.060528138 -0.041157879  0.010955232  0.16528390  1.00000000